# Replication package for 
# "The Economic Leverage of International Organizations in Interstate Disputes"
# Johannes Karreth
# June 30, 2017
# jkarreth@ursinus.edu

# This file: 40_hligo_joint.R
# Purpose: Analyses of dyadic joint memberships in IGOs with high leverage
 
rm(list = ls())

# setwd("...")

library("rio")
library("dplyr")
library("ggplot2")
library("data.table")
library("pscl")

# Source in functions
source("Functions/theme_jk.R")

# Data

dat <- import("hligo_joint.csv")
dat$idptdistance <- (dat$idptproximity * (-1)) + max(dat$idptproximity, na.rm = TRUE)
dat$idptdistance_l1 <- (dat$idptproximity_l1 * (-1)) + max(dat$idptproximity_l1, na.rm = TRUE)

# HLIGOs in the system

igolife <- import("Sources/IGO_igounit_v2.3.short.csv")
igo <- import("Sources/IGO_igounit_v2.3.narrow.csv")
igo_num <- import("igo_overlap_num.csv")
hligo <- igo[igo$ionum %in% igo_num$igo_lev3_count_use, ]

years <- data.frame(year = sort(seq(min(hligo$year), max(hligo$year), by = 1)),
                    ionum = rep(igo_num[is.na(igo_num$igo_lev3_count_use) == FALSE, ]$igo_lev3_count_use, length(seq(min(hligo$year), max(hligo$year), by = 1))))

hligo2 <- merge(x = hligo, y = years, by = c("ionum", "year"), all.x = TRUE, all.y = TRUE)
hligo2 <- mutate(group_by(hligo2, ionum), startyear = max(sdate, na.rm = TRUE))
hligo2 <- hligo2[hligo2$year >= hligo2$startyear, ]
hligo2$hligo <- 1

hligos_alive <- summarize(group_by(hligo2, year),
                          igo_count = sum(hligo))

dat <- merge(x = dat, y = hligos_alive, by = "year", all.x = TRUE, all.y = FALSE)

# Describe the data

table(dat$igo_lev3_count_use)

# Time trends

p.DT <- as.data.table(dat[, c("dyadid", "year", "igo_lev3_count_use")])
p.1 <- p.DT[, quantile(igo_lev3_count_use, probs = c(0.1)), by = c("year")]
p.5 <- p.DT[, quantile(igo_lev3_count_use, probs = c(0.5)), by = c("year")]
p.9 <- p.DT[, quantile(igo_lev3_count_use, probs = c(0.9)), by = c("year")]
p.mean <- p.DT[, mean(igo_lev3_count_use), by = c("year")]
p.dyads <- p.DT[, length(dyadid), by = c("year")]
p.dat <- data.frame(year = c(1946:2005), dyads = p.dyads$V1, igo_lev3_p10 = p.1$V1, igo_lev3_p50 = p.5$V1, igo_lev3_p90 = p.9$V1, igo_lev3_mean = p.mean$V1)
p.dat <- merge(x = p.dat, y = hligos_alive, by = "year", all.x = TRUE, all.y = FALSE)
p.dat

# Estimate zero-inflated Poisson regressions

# MID 1 level

m5.mid1.zip <- zeroinfl(igo_lev3_count_use ~ MID1_l10_d + gdppc_low_ln_l1 + tradedep_low_ln_l1 + atopally_l1 + jointpol7_l1 + idptdistance_l1 + offset(log(igo_count)), data = dat, dist = "poisson")

# MID 4 level
m6.mid4.zip <- zeroinfl(igo_lev3_count_use ~ MID4_l10_d + gdppc_low_ln_l1 + tradedep_low_ln_l1 + atopally_l1 + jointpol7_l1 + idptdistance_l1 + offset(log(igo_count)), data = dat, dist = "poisson")


# Predicted values & first differences
m5.sim.dat <- expand.grid(0:1, mean(dat$gdppc_low_ln_l1, na.rm = TRUE), mean(dat$tradedep_low_ln_l1, na.rm = TRUE), median(dat$atopally_l1, na.rm = TRUE), median(dat$jointpol7_l1, na.rm = TRUE), mean(dat$idptdistance_l1, na.rm = TRUE), median(dat$igo_count, na.rm = TRUE))
colnames(m5.sim.dat) <- c("MID1_l10_d", "gdppc_low_ln_l1", "tradedep_low_ln_l1", "atopally_l1", "jointpol7_l1", "idptdistance_l1", "igo_count")
m5.sim.dat$igo_lev3_count_hat <- predict(m5.mid1.zip, m5.sim.dat, type = "count")
m5.sim.dat_mid <- m5.sim.dat[, c("MID1_l10_d", "igo_lev3_count_hat")]

m5.sim.dat <- expand.grid(0, mean(dat$gdppc_low_ln_l1, na.rm = TRUE), mean(dat$tradedep_low_ln_l1, na.rm = TRUE), median(dat$atopally_l1, na.rm = TRUE), median(dat$jointpol7_l1, na.rm = TRUE), quantile(dat$idptdistance_l1, probs = c(0.05, 0.95), na.rm = TRUE), median(dat$igo_count, na.rm = TRUE))
colnames(m5.sim.dat) <- c("MID1_l10_d", "gdppc_low_ln_l1", "tradedep_low_ln_l1", "atopally_l1", "jointpol7_l1", "idptdistance_l1", "igo_count")
m5.sim.dat$igo_lev3_count_hat <- predict(m5.mid1.zip, m5.sim.dat, type = "count")
m5.sim.dat_idpt <- m5.sim.dat[, c("idptdistance_l1", "igo_lev3_count_hat")]

m6.sim.dat <- expand.grid(0:1, mean(dat$gdppc_low_ln_l1, na.rm = TRUE), mean(dat$tradedep_low_ln_l1, na.rm = TRUE), median(dat$atopally_l1, na.rm = TRUE), median(dat$jointpol7_l1, na.rm = TRUE), mean(dat$idptdistance_l1, na.rm = TRUE), median(dat$igo_count, na.rm = TRUE))
colnames(m6.sim.dat) <- c("MID4_l10_d", "gdppc_low_ln_l1", "tradedep_low_ln_l1", "atopally_l1", "jointpol7_l1", "idptdistance_l1", "igo_count")
m6.sim.dat$igo_lev3_count_hat <- predict(m6.mid4.zip, m6.sim.dat, type = "count")
m6.sim.dat_mid <- m6.sim.dat[, c("MID4_l10_d", "igo_lev3_count_hat")]

m6.sim.dat <- expand.grid(0, mean(dat$gdppc_low_ln_l1, na.rm = TRUE), mean(dat$tradedep_low_ln_l1, na.rm = TRUE), median(dat$atopally_l1, na.rm = TRUE), median(dat$jointpol7_l1, na.rm = TRUE), quantile(dat$idptdistance_l1, probs = c(0.05, 0.95), na.rm = TRUE), median(dat$igo_count, na.rm = TRUE))
colnames(m6.sim.dat) <- c("MID4_l10_d", "gdppc_low_ln_l1", "tradedep_low_ln_l1", "atopally_l1", "jointpol7_l1", "idptdistance_l1", "igo_count")
m6.sim.dat$igo_lev3_count_hat <- predict(m6.mid4.zip, m6.sim.dat, type = "count")
m6.sim.dat_idpt <- m6.sim.dat[, c("idptdistance_l1", "igo_lev3_count_hat")]

zip_fd <- data.frame(pred_count_lo = c(m5.sim.dat_mid[1, 2], m6.sim.dat_mid[1, 2], m5.sim.dat_idpt[1, 2]), 
                     pred_count_hi = c(m5.sim.dat_mid[2, 2], m6.sim.dat_mid[2, 2], m5.sim.dat_idpt[2, 2]),
                     xvar = factor(x = c(1:3), labels = c("Any MID\n(last 10 years)", "MID with use of force\n(last 10 years)", "Ideal point distance\n(low to high)")))

# Plot differences 

p <- ggplot(data = zip_fd, aes(x = pred_count_lo, y = xvar)) + geom_segment(aes(xend = pred_count_hi, yend = xvar), arrow = arrow(length = unit(0.05, "inches"), ends = "last")) + ylab("") + xlab("Predicted count of memberships in IGOs with leverage,\nwith all other predictors set at baseline") + scale_x_continuous(limits = c(1, 4)) + theme_jk()

ggsave(p, file = "Output_Tables-and-Figures/hligos_zip_predcount.pdf", width = 6, height = 2)

# Table

texreg(list(m5.mid1.zip, m6.mid4.zip),
	file = "Output_Tables-and-Figures/hligo_joint_zip_table.tex", 
	stars = 0.1,
	reorder.coef = c(2, 8, 3, 4, 5, 6, 7, 1),
	omit.coef = "Zero model",
	custom.model.names = c("Model 1", "Model 2"),
	digits = 3,
	leading.zero = TRUE,
	center = TRUE, 
	caption = "Determinants of the number of joint memberships in High-Leverage IGOs, 1951-2001.",
	caption.above = TRUE,
	custom.note = "%stars, one-tailed tests. Outcome variable: number of joint memberships in IGOs with leverage in a given dyad-year. Estimation method: Zero-inflated poisson regression via maximum likelihood. Cell entries: Coefficient point estimates and standard errors for count equation.",
	label = "tab:hligo_joint_zip",
	dcolumn = TRUE,
	booktabs = TRUE,
	use.packages = FALSE,
	table = TRUE
	)